## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: strucchange
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: sandwich
## 
## Attaching package: 'strucchange'
## The following object is masked from 'package:stringr':
## 
##     boundary
## Loading required package: urca
## Loading required package: lmtest
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows

Granger causality analysis of all data data.

So far, all of the neuroscientific evidence in favor of the CMC comes from a series of Dynamic Causal Modeling (DCM) analysis of task-based fMRI. The results are certainly convincing, but:

  1. We need converging evidence from another method, to make sure that the the only method used so far (DCM) is not intrinsically biased in favor of a CMC-like architecture. Using a converging method is actually Goal #2 in AFOSR grant.

  2. Although DCM is an elegant tool, it has its fair share of detractors, and two acknowledged downsides.

    2.1. It relies on many assumptions. For instance, DCM also depends on the how task events are modeled are how they are designed to drive neural activity.

    2.2. DCM can only be used in a top-down way, examining different models that need to be chosen beforehand. This is contrast to the most common bottom-up approach i the neurosciences, in which the brain’s architecture is inferred from an analysis of the data.

So, we need a method to estimate effective (i.e., directed) connectivity from data

Theory

Let’s see if we can get CMC from it. The

VOIs <- c('Action', 
          'LTM', 
          'Perception', 
          'Procedural', 
          'WM')

TASKS <- c('Gambling',
           'Relational',
           'Social',
           'WM',
           'Language',
           'Emotion')

R <- length(VOIs) # Num ROIs

GC analysis at the individual level

First, we need to perform GC analysis on all data.

df <- NULL

for (task in TASKS) {
  for (sub in dir(task)[grep("sub-", dir(task))]) {
    M <-read_tsv(paste(task, sub, "cmc.txt", sep="/"), 
                 col_names = VOIs, 
                 col_types = cols(
                   Action = col_double(),
                   LTM = col_double(),
                   Perception = col_double(),
                   Procedural = col_double(),
                   WM = col_double()
                 ))
    
    # Select the best lag, using BIC or "Schwartz Criterion"
    
    lag <- VARselect(M)$selection[3]
    
    gm <- VAR(M, type="none", p = lag)
    
    coef <- coefficients(gm)
    
    for (v in 1:R) {
      voi <- VOIs[v]
      
      Estimates <- coef[[v]][,1][1 : (R * lag)]  # estimates
      Pvalues <- coef[[v]][,4][1 : (R * lag)]  # p-values
      
      subdf <- data.frame(Task = rep(task, R * lag),
                          Subject = rep(sub, R * lag),
                          To = rep(voi, R * lag),
                          From = rep(VOIs, lag),
                          Lag = sort(rep(1:lag, R)),
                          Estimate = Estimates,
                          p = Pvalues)
      if (is.null(df)) {
        df <- subdf
      } else {
        df <- rbind(df, subdf)
      }
    }
  }
}
  
granger_data_complete <- as_tibble(df)

Now, because we have different Lags, we are going to pick the smallest p value for each region across all lags. We are also going to translate these p-values into binary connections, stored in the “Link” variable. If a connection has a p-value < 0.05, it is considered an effective connection and part of that participant’s architecture.

granger_data <- granger_data_complete %>%
  group_by(Task, Subject, To, From) %>%
  summarise(p = min(p),
            Estimate = mean(Estimate)) %>%
  mutate(Link = if_else(p < 0.05, 1, 0))
## `summarise()` has grouped output by 'Task', 'Subject', 'To'. You can override using the `.groups` argument.

Visualize Some Data

Here is an example of the four timeseries

data <- M %>%
  add_column(Time = 1:nrow(M)) %>%
  pivot_longer(cols = c(Action, LTM, Perception, Procedural, WM),
               names_to = "Region",
               values_to = "BOLD")

ggplot(data, aes(x=Time, y=BOLD, col=Region)) +
  geom_line() +
  scale_color_brewer(palette="Set2") +
  facet_wrap(~ Region) +
  theme_pander()

And here are the p values associated with each connection, in each participant, for each task.

for (task in TASKS) {
  p <- ggplot(filter(granger_data, 
                Task == task),
         aes(x=From, y=To)) +
    geom_tile(aes(fill = p), col="white") +
    scale_fill_viridis_c(option="inferno") +
    ggtitle("Probability of Connection") +
    coord_equal(ratio = 1) +
    facet_wrap(~ Subject) +
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
    ggtitle(task) +
    theme_pander() 
  print(p)
}

# ggplot(granger_data, aes(x=From, y=To)) +
#   geom_tile(aes(fill=Link), col="white") +
#   scale_fill_viridis_c(option="inferno", end = 0.8) +
#   ggtitle("Inferred Architecture") +
#   facet_wrap(Subject ~ Task ) +
#   coord_equal(ratio = 1) +
#   theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
#   theme_pander() 

Now, we can look at the distribution of lags across tasks and participants:

lags <- granger_data_complete %>%
  group_by(Task, Subject) %>%
  summarise(Lag = max(Lag))
## `summarise()` has grouped output by 'Task'. You can override using the `.groups` argument.
ggplot(lags, aes(x=Lag, fill=Task)) +
  geom_bar(position = "dodge", 
           col="white",
           width=0.75) +
  scale_fill_jama() +
  theme_pander()

Inferring Architectures

To infer an architecture from a distribution of p-values, we need a few statistical functions. Here are a few that will be computed.

Mode <- function(codes) {
  which.max(tabulate(codes))
}

fisher.chi <- function(pvals) {
    logsum <- -2 * sum(log(pvals))
    1 - pchisq(logsum, df = (2 * length(pvals)))
}

friston.test <- function(pvals) {
    max(pvals) ** length(pvals)
}

nichols.test <- function(pvals) {
    max(pvals)
}

tippet.test <- function(pvals) {
  s <- min(pvals)
  1 - (1-s)**length(pvals)
}

binom.test.cmc <- function(binvals) {
  binom.test(sum(binvals), 
             n=length(binvals),
             alternative = "less")$p.value
}

And now we can aggregate the data by task, using the functions above.

group_data <- granger_data %>%
  group_by(Task, To, From) %>%
  summarize(p.nichols = nichols.test(p),
            p.friston = friston.test(p),
            Plink = binom.test.cmc(Link),
            Sign = sum(2*Link - 1),
            Weight = mean(Estimate)) %>%
  mutate(Link = if_else(Plink > 0.95, 1, 0),
         FristonLink = if_else(p.friston < 0.05, 1, 0))
## `summarise()` has grouped output by 'Task', 'To'. You can override using the `.groups` argument.

Task-level Architectures

Here are the task-level functional architectures.

ggplot(group_data,
       aes(x=From, y=To)) +
  geom_tile(aes(fill=Plink), col="white") +
  labs(fill="Probability") +
  facet_wrap(~ Task) +
  scale_fill_viridis_c(option="inferno", end = 0.8) +
  ggtitle("Task-Specific Architectures") +
  coord_equal(ratio = 1) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  theme_pander() +
  theme(legend.position = "bottom")

ggplot(group_data,
       aes(x=From, y=To)) +
  geom_tile(aes(fill=Link), col="white") +
  labs(fill="Probability") +
  facet_wrap(~ Task) +
  scale_fill_viridis_c(option="inferno", end = 0.8) +
  ggtitle("Task-Specific Architectures") +
  coord_equal(ratio = 1) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  theme_pander() +
  theme(legend.position = "bottom")

General Architecture

Now, let’s calculate an architecture across all tasks. We will use majority rule: If a connection apperas in at least half of the tasks, it will be considered part of the architecture.

architecture <- group_data %>%
  group_by(To, From) %>%
  summarise(Link = if_else(sum(Link) >= 3, 1, 0),
            PFisher = 1 - fisher.chi(1 - Plink)) %>%
  add_column(Connectivity = "Empirical")
## `summarise()` has grouped output by 'To'. You can override using the `.groups` argument.
ggplot(architecture,
       aes(x=From, y=To)) +
  geom_tile(aes(fill=PFisher), col="white") +
  scale_fill_viridis_c(option="inferno", end = 0.8) +
  ggtitle("Domain-General Architecture") +
  coord_equal(ratio = 1) +
  labs(fill="Probability") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  theme_pander() 

Comparising to CMC

And now we can compare it to the CMC theory.

architectures <- read_tsv("architectures.tsv",
                          col_types = cols(
                            From = col_character(),
                            To = col_character(),
                            Link = col_double()
                          ))  
cmc <- architectures %>%
  filter(Model == "CMC")

cmc <- cmc %>%
  add_column(Connectivity = "CMC Theory")

archi <- architecture %>%
  dplyr::select(To, From, PFisher, Connectivity) %>%
  mutate(Link = if_else(PFisher > 0.999, 1, 0)) %>%
  dplyr::select(To, From, Link, Connectivity)

comparison <- full_join(archi,
                          cmc)
## Joining, by = c("To", "From", "Link", "Connectivity")
ggplot(comparison,
       aes(x=From, y=To)) +
  geom_tile(aes(fill=Link), col="white") +
  facet_wrap(~ Connectivity) +
  scale_fill_viridis_c(option="inferno", end = 0.8) +
  ggtitle("Predicted vs. Observed Architecture") +
  coord_equal(ratio = 1) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  theme_pander() 

The CMC differs from the empirically observed connectivity only in 3 of the 25 predictions. Assuming that the proability of a connection is 14/25 (the number of connections predicted by the theory), the probability of making 22 correct identifications out of 25 is binom.test(22, 25, p = 14/25).

Other architectures

But what about other architectures? Here, we will consider the six competitor architectures examined in the NeuroImage paper. Here is their matrix representation.

architectures$Model <- factor(architectures$Model,
                              levels = c("Hub PFC", "Hub Temporal", "Hub Procedural",
                                         "Hierarchical 1", "Hierarchical 2", "Hierarchical 3",
                                         "CMC"))
ggplot(architectures,
       aes(x=From, y=To)) +
  geom_tile(aes(fill=Link), col="white") +
  facet_wrap(~ Model, nrow=3) +
  ggtitle("Connectivity Matrix Representation\nof the Different Architectures ") +
  scale_fill_viridis_c(option="inferno", end = 0.8) +
#  ggtitle("Predicted vs. Observed Architecture") +
  coord_equal(ratio = 1) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  theme_pander() +
  theme(legend.position = "NA")

How good do they do? To compare them, we will use three different metrics. The first is the degree of Overlap, or the proportion of correctly predicted connections. The second is the Correlation between the vectors of observed and predicted connections. The third is the Z-score, or the standard deviations of the distance between the number of correctly predicted connections and the number expected by chance.

Here is a table with the corresponding metrics:

mbinom <- function(x) {
  binom.test(x, 25)$p.value
}

mbinom <- Vectorize(mbinom)

model_comparisons <- architectures %>%
  group_by(Model) %>%
  summarise(Correlation = cor(Link, archi$Link) ** 2,
            Overlap = (25 - sum(abs(Link - archi$Link)))/25) %>%
  mutate(Z = qnorm(1-mbinom(Overlap * 25))) 


model_comparisons %>%
  xtable() %>%
  kable(digits=3) %>%
  kable_styling(bootstrap_options = c("hover", "striped"))
Model Correlation Overlap Z
Hub PFC 0.510 0.84 3.118
Hub Temporal 0.001 0.52 -Inf
Hub Procedural 0.100 0.36 0.740
Hierarchical 1 0.001 0.52 -Inf
Hierarchical 2 0.040 0.60 0.191
Hierarchical 3 0.040 0.60 0.191
CMC 0.599 0.88 3.604

and here is a visual representation of the performance of the seven architectures. As shown, the CMC outperforms all of the other competitors.

model_names <- c("CMC", 
                 "Hierarchical 1", 
                 "Hierarchical 2",
                 "Hierarchical 3",
                 "Hub PFC", 
                 "Hub Procedural",
                 "Hub Temporal"
  )

model_colors <- c("red3", 
                  "palegreen4", "palegreen3", "palegreen2",
                  #"goldenrod4", "goldenrod3", "goldenrod2",
                  "deepskyblue4", "deepskyblue3", "deepskyblue2")


model_comparisons %>%
  mutate(Z = if_else(Z > 0, Z, 0)) %>%
  pivot_longer(cols=c(Overlap, Correlation, Z),
               names_to = "Measure",
               values_to = "Value") -> lmodel_comparisons

lmodel_comparisons$Model <- factor(lmodel_comparisons$Model,
                                   levels = model_names)

ggplot(lmodel_comparisons, aes(x=Model, y= Value)) +
  geom_col(aes(fill=Model, col=Model), alpha=0.5) +
  #scale_fill_jama() +
  scale_fill_manual(values = model_colors) +
  scale_color_manual(values = model_colors) +
  facet_wrap(~ Measure, scales = "free_y", nrow=3) +
  theme_pander() +
  ggtitle("Comparison of All Architectures") +
  theme(axis.text.x = element_text(angle=90, hjust=1)) +
  geom_text(aes(label=format(round(Value, 2), nsmall = 2)),
            position = position_stack(vjust = 0.5)) +
  theme(legend.position = "NA",
        axis.text.x = element_text(angle=45))